Introduction¶
In this notebook we will explore the extracted features from the WESAD dataset.
%reload_ext pretty_jupyter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics
import sklearn.feature_selection as fs
import seaborn as sns
import plotly_express as px
import plotly.offline as pyo
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
pyo.init_notebook_mode()
General Analysis¶
First, we import the dataset.
data = pd.read_csv('../data/03_primary/WESAD/combined_subjects.csv')
Data Preview
| Unnamed: 0 | net_acc_mean | net_acc_std | net_acc_min | net_acc_max | EDA_phasic_mean | EDA_phasic_std | EDA_phasic_min | EDA_phasic_max | EDA_smna_mean | EDA_smna_std | EDA_smna_min | EDA_smna_max | EDA_tonic_mean | EDA_tonic_std | EDA_tonic_min | EDA_tonic_max | BVP_mean | BVP_std | BVP_min | BVP_max | TEMP_mean | TEMP_std | TEMP_min | TEMP_max | ACC_x_mean | ACC_x_std | ACC_x_min | ACC_x_max | ACC_y_mean | ACC_y_std | ACC_y_min | ACC_y_max | ACC_z_mean | ACC_z_std | ACC_z_min | ACC_z_max | 0_mean | 0_std | 0_min | 0_max | BVP_peak_freq | TEMP_slope | subject | label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1.331891 | 0.153556 | 1.014138 | 1.678399 | 2.247876 | 1.112076 | 0.367977 | 4.459367 | 1.592308 | 2.645333 | 3.096905e-08 | 17.418821 | 0.608263 | 1.212010 | -1.213173 | 2.554750 | -0.043934 | 112.391233 | -392.28 | 554.77 | 35.816000 | 0.017436 | 35.77 | 35.87 | 0.024658 | 0.018284 | -0.037843 | 0.087383 | 0.000017 | 0.000013 | -0.000026 | 0.000060 | 0.000017 | 0.000013 | -0.000026 | 0.000060 | 0.027558 | 0.013523 | 0.000000 | 0.087383 | 0.080556 | -0.000102 | 2 | 1 |
| 1 | 1 | 1.218994 | 0.090108 | 1.014138 | 1.485800 | 1.781323 | 1.203991 | 0.232625 | 4.459367 | 1.347750 | 2.666659 | 3.096905e-08 | 17.418821 | 0.731985 | 1.171627 | -1.213173 | 2.477276 | -1.189267 | 120.431399 | -392.28 | 554.77 | 35.796111 | 0.029522 | 35.75 | 35.87 | 0.020313 | 0.019242 | -0.037843 | 0.087383 | 0.000014 | 0.000013 | -0.000026 | 0.000060 | 0.000014 | 0.000013 | -0.000026 | 0.000060 | 0.023420 | 0.015310 | 0.000000 | 0.087383 | 0.144444 | -0.000424 | 2 | 1 |
| 2 | 2 | 1.143312 | 0.110987 | 0.948835 | 1.485800 | 1.173169 | 1.285422 | 0.006950 | 4.459367 | 0.752335 | 1.958546 | 3.096905e-08 | 17.418821 | 1.110242 | 1.112268 | -1.213173 | 2.037179 | 0.280427 | 87.571000 | -357.53 | 371.12 | 35.763056 | 0.044673 | 35.68 | 35.87 | 0.016618 | 0.015316 | -0.021330 | 0.071558 | 0.000011 | 0.000011 | -0.000015 | 0.000049 | 0.000011 | 0.000011 | -0.000015 | 0.000049 | 0.018759 | 0.012604 | 0.000000 | 0.071558 | 0.102778 | -0.000814 | 2 | 1 |
| 3 | 3 | 1.020669 | 0.135308 | 0.811090 | 1.239944 | 0.311656 | 0.278650 | 0.006950 | 1.303071 | 0.198576 | 0.413802 | 3.309990e-08 | 2.788862 | 1.598995 | 0.350355 | 0.959752 | 2.037179 | 0.055833 | 68.797466 | -345.19 | 359.57 | 35.725000 | 0.033491 | 35.66 | 35.81 | 0.022681 | 0.012560 | -0.006881 | 0.054356 | 0.000016 | 0.000009 | -0.000005 | 0.000037 | 0.000016 | 0.000009 | -0.000005 | 0.000037 | 0.022888 | 0.012180 | 0.000688 | 0.054356 | 0.108333 | -0.000524 | 2 | 1 |
| 4 | 4 | 0.887458 | 0.116048 | 0.727406 | 1.125306 | 0.163826 | 0.110277 | 0.006950 | 0.369298 | 0.118080 | 0.237575 | 2.787285e-08 | 1.300810 | 1.342085 | 0.405980 | 0.945946 | 2.037179 | 0.096681 | 43.606312 | -289.26 | 209.89 | 35.701333 | 0.022420 | 35.66 | 35.75 | 0.028105 | 0.010415 | 0.002752 | 0.054356 | 0.000019 | 0.000007 | 0.000002 | 0.000037 | 0.000019 | 0.000007 | 0.000002 | 0.000037 | 0.028105 | 0.010415 | 0.002752 | 0.054356 | 0.147222 | -0.000165 | 2 | 1 |
| 5 | 5 | 0.776920 | 0.071154 | 0.681346 | 0.956575 | 0.155098 | 0.115413 | 0.002306 | 0.369298 | 0.113253 | 0.233061 | 2.787285e-08 | 1.289171 | 1.015119 | 0.158530 | 0.817326 | 1.513996 | -0.642795 | 52.948702 | -289.26 | 209.89 | 35.705056 | 0.023058 | 35.66 | 35.75 | 0.034358 | 0.004849 | 0.002752 | 0.054356 | 0.000024 | 0.000003 | 0.000002 | 0.000037 | 0.000024 | 0.000003 | 0.000002 | 0.000037 | 0.034358 | 0.004849 | 0.002752 | 0.054356 | 0.138889 | 0.000261 | 2 | 1 |
| 6 | 6 | 0.705557 | 0.055554 | 0.608254 | 0.819336 | 0.080122 | 0.092646 | 0.002306 | 0.319375 | 0.048063 | 0.151028 | 2.787285e-08 | 1.105898 | 0.873283 | 0.105136 | 0.656496 | 1.013622 | -0.037437 | 41.045187 | -199.01 | 194.12 | 35.721444 | 0.028090 | 35.66 | 35.77 | 0.031188 | 0.004681 | 0.013761 | 0.039907 | 0.000021 | 0.000003 | 0.000009 | 0.000027 | 0.000021 | 0.000003 | 0.000009 | 0.000027 | 0.031188 | 0.004681 | 0.013761 | 0.039907 | 0.138889 | 0.000460 | 2 | 1 |
| 7 | 7 | 0.639991 | 0.054349 | 0.543110 | 0.725169 | 0.022266 | 0.034928 | 0.000015 | 0.132781 | 0.016674 | 0.090613 | 5.174644e-08 | 0.997037 | 0.732013 | 0.147837 | 0.460235 | 0.999065 | -0.083809 | 35.416182 | -197.37 | 194.12 | 35.753111 | 0.029950 | 35.71 | 35.81 | 0.029377 | 0.004256 | 0.013761 | 0.038531 | 0.000020 | 0.000003 | 0.000009 | 0.000027 | 0.000020 | 0.000003 | 0.000009 | 0.000027 | 0.029377 | 0.004256 | 0.013761 | 0.038531 | 0.152778 | 0.000516 | 2 | 1 |
| 8 | 8 | 0.580220 | 0.054845 | 0.486494 | 0.685270 | 0.024059 | 0.037475 | 0.000015 | 0.167825 | 0.025170 | 0.089431 | 3.297693e-08 | 0.601262 | 0.548576 | 0.180334 | 0.146098 | 0.816318 | 0.548538 | 57.092149 | -367.11 | 363.29 | 35.783667 | 0.033894 | 35.73 | 35.84 | 0.027603 | 0.007144 | -0.002752 | 0.066053 | 0.000019 | 0.000005 | -0.000002 | 0.000045 | 0.000019 | 0.000005 | -0.000002 | 0.000045 | 0.027618 | 0.007088 | 0.000000 | 0.066053 | 0.152778 | 0.000593 | 2 | 1 |
| 9 | 9 | 0.532770 | 0.036903 | 0.474375 | 0.607551 | 0.165363 | 0.216325 | 0.000015 | 0.669836 | 0.152681 | 0.475520 | 3.284132e-08 | 3.622407 | 0.263263 | 0.287734 | -0.202700 | 0.653034 | -0.310028 | 96.934155 | -670.20 | 363.29 | 35.814722 | 0.028076 | 35.75 | 35.87 | 0.028278 | 0.010877 | -0.030962 | 0.074998 | 0.000019 | 0.000007 | -0.000021 | 0.000052 | 0.000019 | 0.000007 | -0.000021 | 0.000052 | 0.028672 | 0.009792 | 0.000000 | 0.074998 | 0.122222 | 0.000447 | 2 | 1 |
We can observe that the all the data is numeric and there are no missing values. We will remove the first column as it is just a clone of the index.
data = data.drop([data.columns[0]], axis=1)
Modified Data Preview
| net_acc_mean | net_acc_std | net_acc_min | net_acc_max | EDA_phasic_mean | EDA_phasic_std | EDA_phasic_min | EDA_phasic_max | EDA_smna_mean | EDA_smna_std | EDA_smna_min | EDA_smna_max | EDA_tonic_mean | EDA_tonic_std | EDA_tonic_min | EDA_tonic_max | BVP_mean | BVP_std | BVP_min | BVP_max | TEMP_mean | TEMP_std | TEMP_min | TEMP_max | ACC_x_mean | ACC_x_std | ACC_x_min | ACC_x_max | ACC_y_mean | ACC_y_std | ACC_y_min | ACC_y_max | ACC_z_mean | ACC_z_std | ACC_z_min | ACC_z_max | 0_mean | 0_std | 0_min | 0_max | BVP_peak_freq | TEMP_slope | subject | label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.331891 | 0.153556 | 1.014138 | 1.678399 | 2.247876 | 1.112076 | 0.367977 | 4.459367 | 1.592308 | 2.645333 | 3.096905e-08 | 17.418821 | 0.608263 | 1.212010 | -1.213173 | 2.554750 | -0.043934 | 112.391233 | -392.28 | 554.77 | 35.816000 | 0.017436 | 35.77 | 35.87 | 0.024658 | 0.018284 | -0.037843 | 0.087383 | 0.000017 | 0.000013 | -0.000026 | 0.000060 | 0.000017 | 0.000013 | -0.000026 | 0.000060 | 0.027558 | 0.013523 | 0.000000 | 0.087383 | 0.080556 | -0.000102 | 2 | 1 |
| 1 | 1.218994 | 0.090108 | 1.014138 | 1.485800 | 1.781323 | 1.203991 | 0.232625 | 4.459367 | 1.347750 | 2.666659 | 3.096905e-08 | 17.418821 | 0.731985 | 1.171627 | -1.213173 | 2.477276 | -1.189267 | 120.431399 | -392.28 | 554.77 | 35.796111 | 0.029522 | 35.75 | 35.87 | 0.020313 | 0.019242 | -0.037843 | 0.087383 | 0.000014 | 0.000013 | -0.000026 | 0.000060 | 0.000014 | 0.000013 | -0.000026 | 0.000060 | 0.023420 | 0.015310 | 0.000000 | 0.087383 | 0.144444 | -0.000424 | 2 | 1 |
| 2 | 1.143312 | 0.110987 | 0.948835 | 1.485800 | 1.173169 | 1.285422 | 0.006950 | 4.459367 | 0.752335 | 1.958546 | 3.096905e-08 | 17.418821 | 1.110242 | 1.112268 | -1.213173 | 2.037179 | 0.280427 | 87.571000 | -357.53 | 371.12 | 35.763056 | 0.044673 | 35.68 | 35.87 | 0.016618 | 0.015316 | -0.021330 | 0.071558 | 0.000011 | 0.000011 | -0.000015 | 0.000049 | 0.000011 | 0.000011 | -0.000015 | 0.000049 | 0.018759 | 0.012604 | 0.000000 | 0.071558 | 0.102778 | -0.000814 | 2 | 1 |
| 3 | 1.020669 | 0.135308 | 0.811090 | 1.239944 | 0.311656 | 0.278650 | 0.006950 | 1.303071 | 0.198576 | 0.413802 | 3.309990e-08 | 2.788862 | 1.598995 | 0.350355 | 0.959752 | 2.037179 | 0.055833 | 68.797466 | -345.19 | 359.57 | 35.725000 | 0.033491 | 35.66 | 35.81 | 0.022681 | 0.012560 | -0.006881 | 0.054356 | 0.000016 | 0.000009 | -0.000005 | 0.000037 | 0.000016 | 0.000009 | -0.000005 | 0.000037 | 0.022888 | 0.012180 | 0.000688 | 0.054356 | 0.108333 | -0.000524 | 2 | 1 |
| 4 | 0.887458 | 0.116048 | 0.727406 | 1.125306 | 0.163826 | 0.110277 | 0.006950 | 0.369298 | 0.118080 | 0.237575 | 2.787285e-08 | 1.300810 | 1.342085 | 0.405980 | 0.945946 | 2.037179 | 0.096681 | 43.606312 | -289.26 | 209.89 | 35.701333 | 0.022420 | 35.66 | 35.75 | 0.028105 | 0.010415 | 0.002752 | 0.054356 | 0.000019 | 0.000007 | 0.000002 | 0.000037 | 0.000019 | 0.000007 | 0.000002 | 0.000037 | 0.028105 | 0.010415 | 0.002752 | 0.054356 | 0.147222 | -0.000165 | 2 | 1 |
| 5 | 0.776920 | 0.071154 | 0.681346 | 0.956575 | 0.155098 | 0.115413 | 0.002306 | 0.369298 | 0.113253 | 0.233061 | 2.787285e-08 | 1.289171 | 1.015119 | 0.158530 | 0.817326 | 1.513996 | -0.642795 | 52.948702 | -289.26 | 209.89 | 35.705056 | 0.023058 | 35.66 | 35.75 | 0.034358 | 0.004849 | 0.002752 | 0.054356 | 0.000024 | 0.000003 | 0.000002 | 0.000037 | 0.000024 | 0.000003 | 0.000002 | 0.000037 | 0.034358 | 0.004849 | 0.002752 | 0.054356 | 0.138889 | 0.000261 | 2 | 1 |
| 6 | 0.705557 | 0.055554 | 0.608254 | 0.819336 | 0.080122 | 0.092646 | 0.002306 | 0.319375 | 0.048063 | 0.151028 | 2.787285e-08 | 1.105898 | 0.873283 | 0.105136 | 0.656496 | 1.013622 | -0.037437 | 41.045187 | -199.01 | 194.12 | 35.721444 | 0.028090 | 35.66 | 35.77 | 0.031188 | 0.004681 | 0.013761 | 0.039907 | 0.000021 | 0.000003 | 0.000009 | 0.000027 | 0.000021 | 0.000003 | 0.000009 | 0.000027 | 0.031188 | 0.004681 | 0.013761 | 0.039907 | 0.138889 | 0.000460 | 2 | 1 |
| 7 | 0.639991 | 0.054349 | 0.543110 | 0.725169 | 0.022266 | 0.034928 | 0.000015 | 0.132781 | 0.016674 | 0.090613 | 5.174644e-08 | 0.997037 | 0.732013 | 0.147837 | 0.460235 | 0.999065 | -0.083809 | 35.416182 | -197.37 | 194.12 | 35.753111 | 0.029950 | 35.71 | 35.81 | 0.029377 | 0.004256 | 0.013761 | 0.038531 | 0.000020 | 0.000003 | 0.000009 | 0.000027 | 0.000020 | 0.000003 | 0.000009 | 0.000027 | 0.029377 | 0.004256 | 0.013761 | 0.038531 | 0.152778 | 0.000516 | 2 | 1 |
| 8 | 0.580220 | 0.054845 | 0.486494 | 0.685270 | 0.024059 | 0.037475 | 0.000015 | 0.167825 | 0.025170 | 0.089431 | 3.297693e-08 | 0.601262 | 0.548576 | 0.180334 | 0.146098 | 0.816318 | 0.548538 | 57.092149 | -367.11 | 363.29 | 35.783667 | 0.033894 | 35.73 | 35.84 | 0.027603 | 0.007144 | -0.002752 | 0.066053 | 0.000019 | 0.000005 | -0.000002 | 0.000045 | 0.000019 | 0.000005 | -0.000002 | 0.000045 | 0.027618 | 0.007088 | 0.000000 | 0.066053 | 0.152778 | 0.000593 | 2 | 1 |
| 9 | 0.532770 | 0.036903 | 0.474375 | 0.607551 | 0.165363 | 0.216325 | 0.000015 | 0.669836 | 0.152681 | 0.475520 | 3.284132e-08 | 3.622407 | 0.263263 | 0.287734 | -0.202700 | 0.653034 | -0.310028 | 96.934155 | -670.20 | 363.29 | 35.814722 | 0.028076 | 35.75 | 35.87 | 0.028278 | 0.010877 | -0.030962 | 0.074998 | 0.000019 | 0.000007 | -0.000021 | 0.000052 | 0.000019 | 0.000007 | -0.000021 | 0.000052 | 0.028672 | 0.009792 | 0.000000 | 0.074998 | 0.122222 | 0.000447 | 2 | 1 |
Dataset Column Overview¶
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2091 entries, 0 to 2090 Data columns (total 44 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 net_acc_mean 2091 non-null float64 1 net_acc_std 2091 non-null float64 2 net_acc_min 2091 non-null float64 3 net_acc_max 2091 non-null float64 4 EDA_phasic_mean 2091 non-null float64 5 EDA_phasic_std 2091 non-null float64 6 EDA_phasic_min 2091 non-null float64 7 EDA_phasic_max 2091 non-null float64 8 EDA_smna_mean 2091 non-null float64 9 EDA_smna_std 2091 non-null float64 10 EDA_smna_min 2091 non-null float64 11 EDA_smna_max 2091 non-null float64 12 EDA_tonic_mean 2091 non-null float64 13 EDA_tonic_std 2091 non-null float64 14 EDA_tonic_min 2091 non-null float64 15 EDA_tonic_max 2091 non-null float64 16 BVP_mean 2091 non-null float64 17 BVP_std 2091 non-null float64 18 BVP_min 2091 non-null float64 19 BVP_max 2091 non-null float64 20 TEMP_mean 2091 non-null float64 21 TEMP_std 2091 non-null float64 22 TEMP_min 2091 non-null float64 23 TEMP_max 2091 non-null float64 24 ACC_x_mean 2091 non-null float64 25 ACC_x_std 2091 non-null float64 26 ACC_x_min 2091 non-null float64 27 ACC_x_max 2091 non-null float64 28 ACC_y_mean 2091 non-null float64 29 ACC_y_std 2091 non-null float64 30 ACC_y_min 2091 non-null float64 31 ACC_y_max 2091 non-null float64 32 ACC_z_mean 2091 non-null float64 33 ACC_z_std 2091 non-null float64 34 ACC_z_min 2091 non-null float64 35 ACC_z_max 2091 non-null float64 36 0_mean 2091 non-null float64 37 0_std 2091 non-null float64 38 0_min 2091 non-null float64 39 0_max 2091 non-null float64 40 BVP_peak_freq 2091 non-null float64 41 TEMP_slope 2091 non-null float64 42 subject 2091 non-null int64 43 label 2091 non-null int64 dtypes: float64(42), int64(2) memory usage: 718.9 KB
Dataset Shape¶
(2091, 44)
Descriptive Statistics¶
Now we will explore the data. We will start by looking at the distribution of the features.
| net_acc_mean | net_acc_std | net_acc_min | net_acc_max | EDA_phasic_mean | EDA_phasic_std | EDA_phasic_min | EDA_phasic_max | EDA_smna_mean | EDA_smna_std | EDA_smna_min | EDA_smna_max | EDA_tonic_mean | EDA_tonic_std | EDA_tonic_min | EDA_tonic_max | BVP_mean | BVP_std | BVP_min | BVP_max | TEMP_mean | TEMP_std | TEMP_min | TEMP_max | ACC_x_mean | ACC_x_std | ACC_x_min | ACC_x_max | ACC_y_mean | ACC_y_std | ACC_y_min | ACC_y_max | ACC_z_mean | ACC_z_std | ACC_z_min | ACC_z_max | 0_mean | 0_std | 0_min | 0_max | BVP_peak_freq | TEMP_slope | subject | label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2.091000e+03 | 2.091000e+03 | 2.091000e+03 | 2.091000e+03 | 2.091000e+03 | 2.091000e+03 | 2.091000e+03 | 2.091000e+03 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2.091000e+03 | 2091.000000 | 2091.000000 | 2091.000000 | 2.091000e+03 | 2.091000e+03 | 2.091000e+03 | 2091.000000 | 2.091000e+03 | 2.091000e+03 | 2.091000e+03 | 2091.000000 | 2.091000e+03 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 | 2091.000000 |
| mean | 1.966550 | 0.053456 | 1.855260 | 2.089283 | 1.700688e-01 | 1.118879e-01 | 3.403359e-02 | 4.413895e-01 | 1.300774e-01 | 2.672041e-01 | 6.123098e-08 | 1.826972e+00 | -0.041869 | 0.113459 | -0.255890 | 0.108671 | -0.000153 | 49.919955 | -270.200014 | 240.117805 | 33.018114 | 0.026576 | 32.967303 | 33.068604 | 0.011463 | 4.462103e-03 | -0.007762 | 0.030142 | 0.000008 | 3.070164e-06 | -5.340943e-06 | 2.073911e-05 | 0.000008 | 3.070164e-06 | -5.340943e-06 | 2.073911e-05 | 0.029561 | 4.187458e-03 | 0.014872 | 0.048907 | 0.125485 | -0.000015 | 9.404113 | 1.140603 |
| std | 2.657218 | 0.091732 | 2.496197 | 2.789401 | 5.482836e-01 | 4.572540e-01 | 1.063828e-01 | 1.489958e+00 | 4.227190e-01 | 7.230139e-01 | 4.659428e-08 | 5.546362e+00 | 1.226109 | 0.438957 | 1.669225 | 1.201522 | 0.573221 | 40.131618 | 238.855851 | 209.107222 | 1.470879 | 0.020533 | 1.467757 | 1.475477 | 0.028678 | 4.229046e-03 | 0.034671 | 0.032306 | 0.000020 | 2.909809e-06 | 2.385519e-05 | 2.222845e-05 | 0.000020 | 2.909809e-06 | 2.385519e-05 | 2.222845e-05 | 0.009287 | 3.689155e-03 | 0.013045 | 0.018166 | 0.039913 | 0.000565 | 4.706482 | 0.661542 |
| min | 0.091182 | 0.000742 | 0.074363 | 0.100672 | 1.135074e-07 | 1.525014e-08 | 6.445254e-08 | 1.709373e-07 | 8.388991e-08 | 1.827182e-08 | 3.479847e-09 | 1.532876e-07 | -10.033692 | 0.000257 | -25.222599 | -2.216655 | -5.428135 | 2.834831 | -1617.860000 | 7.270000 | 29.381111 | 0.007700 | 29.330000 | 29.430000 | -0.044579 | 5.759282e-16 | -0.088071 | -0.040595 | -0.000031 | 6.403569e-19 | -6.059740e-05 | -2.793162e-05 | -0.000031 | 6.403569e-19 | -6.059740e-05 | -2.793162e-05 | 0.000555 | 8.326673e-16 | 0.000000 | 0.004128 | 0.025000 | -0.003220 | 2.000000 | 0.000000 |
| 25% | 0.307707 | 0.004405 | 0.292997 | 0.321858 | 4.294791e-03 | 5.038863e-03 | 8.542278e-06 | 2.139904e-02 | 3.276647e-03 | 1.583230e-02 | 2.948547e-08 | 1.359000e-01 | -0.795608 | 0.010061 | -0.889842 | -0.753547 | -0.148356 | 23.048587 | -352.120000 | 94.575000 | 32.274222 | 0.014900 | 32.230000 | 32.310000 | -0.020900 | 9.221599e-04 | -0.035779 | 0.000688 | -0.000014 | 6.344950e-07 | -2.461769e-05 | 4.734172e-07 | -0.000014 | 6.344950e-07 | -2.461769e-05 | 4.734172e-07 | 0.023266 | 9.206196e-04 | 0.002064 | 0.037843 | 0.097222 | -0.000290 | 5.000000 | 1.000000 |
| 50% | 0.846770 | 0.017041 | 0.772491 | 0.938149 | 2.018820e-02 | 2.109368e-02 | 3.296700e-04 | 8.417205e-02 | 1.534513e-02 | 5.711463e-02 | 4.831052e-08 | 5.035151e-01 | -0.498252 | 0.034663 | -0.525585 | -0.440061 | -0.003823 | 38.391360 | -197.370000 | 189.240000 | 33.166889 | 0.019317 | 33.130000 | 33.230000 | 0.024675 | 3.285789e-03 | 0.000688 | 0.040595 | 0.000017 | 2.260798e-06 | 4.734172e-07 | 2.793162e-05 | 0.000017 | 2.260798e-06 | 4.734172e-07 | 2.793162e-05 | 0.030536 | 3.228065e-03 | 0.012385 | 0.048164 | 0.127778 | -0.000053 | 9.000000 | 1.000000 |
| 75% | 2.665476 | 0.063217 | 2.516406 | 2.884861 | 1.618663e-01 | 9.929476e-02 | 1.428761e-02 | 4.578447e-01 | 1.231743e-01 | 2.893434e-01 | 7.709707e-08 | 1.908303e+00 | 0.735160 | 0.096417 | 0.565363 | 0.989545 | 0.147601 | 64.203423 | -100.530000 | 323.660000 | 34.011000 | 0.029580 | 33.950000 | 34.070000 | 0.036200 | 6.795670e-03 | 0.021330 | 0.051948 | 0.000025 | 4.675783e-06 | 1.467593e-05 | 3.574300e-05 | 0.000025 | 4.675783e-06 | 1.467593e-05 | 3.574300e-05 | 0.037319 | 6.645341e-03 | 0.025458 | 0.057796 | 0.150000 | 0.000171 | 14.000000 | 2.000000 |
| max | 15.632220 | 1.130964 | 14.720361 | 15.931444 | 1.197433e+01 | 1.044126e+01 | 1.838081e+00 | 2.963154e+01 | 9.223967e+00 | 1.419266e+01 | 2.929251e-07 | 1.172344e+02 | 3.028557 | 9.991237 | 2.890934 | 3.291220 | 4.628719 | 320.678627 | -9.280000 | 1789.000000 | 35.933111 | 0.193635 | 35.910000 | 35.970000 | 0.043367 | 2.607680e-02 | 0.043347 | 0.087383 | 0.000030 | 1.794223e-05 | 2.982528e-05 | 6.012398e-05 | 0.000030 | 1.794223e-05 | 2.982528e-05 | 6.012398e-05 | 0.044579 | 1.874508e-02 | 0.043347 | 0.088071 | 0.319444 | 0.003682 | 17.000000 | 2.000000 |
Feature selection¶
After loading and observing the dataset, it's time to find best features.
# def get_best_features()
kBest = fs.SelectKBest(fs.f_classif, k=5)
res = kBest.fit_transform(data.drop(columns=['label']), data['label'])
filter = kBest.get_support()
df = pd.DataFrame(res, columns = data.columns[:-1][filter])
df = df.join(data['label'])
print(f"""Top features:\n{" ".join(data.columns[:-1][filter])}""")
Top features: net_acc_std net_acc_max EDA_tonic_mean EDA_tonic_min EDA_tonic_max
cdf = pd.concat([df.drop("label", axis=1), pd.get_dummies(df["label"])], axis=1)
cdf.rename(columns={0: "amusement", 1: "baseline", 2: "stress"}, inplace=True)
corr = cdf.corr()
fig = px.imshow(corr[["amusement", "baseline", "stress"]], text_auto=True)
fig = fig.update_layout(width=500, height=800)
fig.show()
Data analysis by subject¶
from plotly.subplots import make_subplots
import plotly.graph_objects as go
def plot_distribution(feature, nbr_cols=4):
subjects = [2, 3, 4, 5, 6 ,7 ,8, 9, 10, 11, 13, 14, 15, 16, 17]
titles = [f'Subject {x}' for x in subjects]
plot = make_subplots(rows=len(subjects) // nbr_cols+1, cols=nbr_cols, subplot_titles=titles)
row_n = 1
col_n = 1
for sub in subjects:
csv = pd.read_csv(f'../data/03_primary/WESAD/subject_feats/S{sub}_feats_4.csv')
plot.add_trace(go.Bar(y=csv[feature]), row_n, col_n, )
col_n += 1
if col_n > nbr_cols:
col_n = 1
row_n += 1
plot.update_layout(height=1000)
plot.show()
net_acc_std¶
plot_distribution('net_acc_std')
net_acc_max¶
plot_distribution('net_acc_max')
EDA_tonic_mean¶
plot_distribution('EDA_tonic_mean')
EDA_tonic_min¶
plot_distribution('EDA_tonic_min')
EDA_tonic_max¶
plot_distribution('EDA_tonic_max')
The effect of the stress level¶
def get_label(label):
frame = df.loc[df.label==label]
frame.index = range(0, frame.shape[0])
frame.index = pd.to_datetime(frame.index, unit='s')
return frame
amusement = get_label(0)
baseline = get_label(1)
stress = get_label(2)
df.label = df.label.replace(0, 'amusement')
df.label = df.label.replace(1, 'baseline')
df.label = df.label.replace(2, 'stress')
df
| net_acc_std | net_acc_max | EDA_tonic_mean | EDA_tonic_min | EDA_tonic_max | label | |
|---|---|---|---|---|---|---|
| 0 | 0.153556 | 1.678399 | 0.608263 | -1.213173 | 2.554750 | baseline |
| 1 | 0.090108 | 1.485800 | 0.731985 | -1.213173 | 2.477276 | baseline |
| 2 | 0.110987 | 1.485800 | 1.110242 | -1.213173 | 2.037179 | baseline |
| 3 | 0.135308 | 1.239944 | 1.598995 | 0.959752 | 2.037179 | baseline |
| 4 | 0.116048 | 1.125306 | 1.342085 | 0.945946 | 2.037179 | baseline |
| ... | ... | ... | ... | ... | ... | ... |
| 2086 | 0.003381 | 1.011533 | -0.307217 | -0.320846 | -0.286634 | amusement |
| 2087 | 0.002093 | 1.011533 | -0.301157 | -0.315003 | -0.286634 | amusement |
| 2088 | 0.002072 | 1.011132 | -0.305016 | -0.310237 | -0.296301 | amusement |
| 2089 | 0.001978 | 1.010419 | -0.306060 | -0.310788 | -0.299483 | amusement |
| 2090 | 0.001819 | 1.010117 | -0.311347 | -0.330077 | -0.306007 | amusement |
2091 rows × 6 columns
net_acc_std¶
fig = px.line(df, y='net_acc_std', color='label')
fig.show()
net_acc_max¶
fig = px.line(df, y='net_acc_max', color='label')
fig.show()
EDA_tonic_mean¶
fig = px.line(df, y='EDA_tonic_mean', color='label')
fig.show()
EDA_tonic_min¶
fig = px.line(df, y='EDA_tonic_min', color='label')
fig.show()
EDA_tonic_max¶
fig = px.line(df, y='EDA_tonic_max', color='label')
fig.show()